{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\Lickry\\\\Desktop\\\\交互数据\\\\final project\\\\city_chart.html'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyecharts import charts, options\n",
    "import pandas as pd\n",
    "\n",
    "data = pd.read_excel('jobs.xlsx')\n",
    "city_chart = charts.Bar(init_opts=options.InitOpts(width='1000px', height='600px'))\n",
    "city_pt = data.pivot_table(index='city', values='salary', aggfunc='mean')\n",
    "city_pt = city_pt.sort_values('salary',\n",
    "                              ascending=False)\n",
    "city_chart.add_xaxis(list(city_pt.index.values))\n",
    "city_chart.add_yaxis('', [round(float(x)) for x in list(city_pt['salary'])],\n",
    "                     label_opts=options.LabelOpts(is_show=False)).set_global_opts(\n",
    "    datazoom_opts=options.DataZoomOpts(range_start=0, range_end=10, is_show=True))\n",
    "city_chart.set_global_opts(title_opts=options.TitleOpts('各城市平均薪资',\n",
    "                                                        subtitle='海外地区的招聘职位薪资最高，比国内城市中最高的北京还要高出一万多，一线城市平均薪资可达2万以上，其余各新一线城市也在1-2万之间'))\n",
    "city_chart.render('city_chart.html')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\Lickry\\\\Desktop\\\\交互数据\\\\final project\\\\city_map.html'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data = pd.read_excel('jobs.xlsx')\n",
    "city_pt = city_data.pivot_table(index='city', values='salary', aggfunc='count').sort_values('salary',\n",
    "                                                                                            ascending=False)\n",
    "city_map = charts.Map()\n",
    "city_map.add('', [(x, round(float(city_pt['salary'][x]))) for x in city_pt.index.values],\n",
    "             maptype='china-cities',\n",
    "             label_opts=options.LabelOpts(is_show=False)).set_global_opts(\n",
    "    visualmap_opts=options.VisualMapOpts(min_=min(city_pt['salary']), max_=max(city_pt['salary'])))\n",
    "city_map.set_global_opts(title_opts=options.TitleOpts('全国区域分布图',\n",
    "                                                      subtitle='计算机和大数据相关职位主要分布在京津冀、长三角、珠三角地区'))\n",
    "city_map.render('city_map.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\Lickry\\\\Desktop\\\\交互数据\\\\final project\\\\exp_chart.html'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_kw = pd.read_excel('jobs.xlsx')\n",
    "year = ['不限', '在校/应届', '1年以下', '1-3年', '3-5年', '5-10年', '10年以上']\n",
    "exp_chart = charts.Boxplot()\n",
    "exp_chart.add_xaxis(year)\n",
    "exp_chart.add_yaxis('',\n",
    "                    exp_chart.prepare_data([list(data_kw['salary'].loc[data_kw['workYear'] == x]) for x in year]))\n",
    "exp_chart.set_global_opts(title_opts=options.TitleOpts('不同工作经验对应平均薪资',\n",
    "                                                       subtitle='经验与薪资密切相关，5年以上的薪资中位数已达3万，最高接近10万；'\n",
    "                                                                '应届生薪资中位数超过13500，最高可达3万以上'))\n",
    "exp_chart.render('exp_chart.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\Lickry\\\\Desktop\\\\交互数据\\\\final project\\\\edu_bar.html'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_kw = pd.read_excel('jobs.xlsx')\n",
    "education = ['不限', '大专', '本科', '硕士', '博士']\n",
    "edu_pt = data_kw.pivot_table(index='education', values='salary', aggfunc='count')\n",
    "edu_bar = charts.Pie()\n",
    "edu_bar.add('',\n",
    "            [(x, round(float(edu_pt['salary'][x]))) if x in edu_pt.index.values else None for x in education],\n",
    "            label_opts=options.LabelOpts(formatter='{b},{d}%'))\n",
    "edu_bar.set_global_opts(\n",
    "    title_opts=options.TitleOpts('不同学历要求职位数量占比',\n",
    "                                 subtitle='在约5000条招聘信息中，约87%的职位要求学历在本科及以上，本科以下学历仅10%'),\n",
    "    legend_opts=options.LegendOpts(pos_bottom='0'))\n",
    "edu_bar.render('edu_bar.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('Java', 284), ('C++', 199), ('iOS', 151), ('Android', 151), ('移动端', 128), ('Python', 122), ('MySQL', 94), ('后端', 87), ('数据仓库', 86), ('Hadoop', 81), ('Web前端', 68), ('前端开发', 55), ('Swift', 54), ('ETL', 51), ('C', 50), ('数据挖掘', 49), ('Golang', 46), ('Spark', 45), ('深度学习', 41), ('OC', 39), ('服务器端', 37), ('分布式', 34), ('Vue', 32), ('Kotlin', 30), ('web前端', 30), ('数仓建模', 29), ('Flutter', 28), ('Hive', 27), ('软件开发', 26), ('C/C++', 23), ('DBA', 22), ('架构师', 21), ('数仓工程师', 20), ('Flink', 19), ('数据库', 18), ('React', 17), ('数仓架构', 17), ('HTML5', 16), ('Oracle', 16), ('PHP', 15), ('中间件', 15), ('Javascript', 14), ('图像算法', 14), ('推荐算法', 14), ('Node.js', 13), ('SDK', 13), ('机器学习', 12), ('算法', 12), ('组件化', 11), ('Linux', 10), ('Shell', 10), ('游戏开发', 10), ('CSS', 10), ('HTML', 9), ('docker', 9), ('C#', 9), ('混合开发', 9), ('微服务', 9), ('云计算', 9), ('搜索', 9), ('多线程', 8), ('数据库开发', 8), ('图像处理', 8), ('Framework', 8), ('安全', 7), ('Windows', 7), ('自然语言处理', 7), ('QT', 7), ('spring', 7), ('TypeScript', 6), ('java', 6), ('Spring', 6), ('React native', 6), ('IOS', 6), ('人工智能', 6), ('音视频开发', 6), ('性能优化', 6), ('运维', 6), ('意图识别', 5), ('游戏', 5), ('嵌入式', 5), ('客户端', 5), ('Angular', 5), ('音视频', 5), ('SQL', 5), ('目标检测', 4), ('爬虫', 4), ('数据库运维', 4), ('逆向', 4), ('SQLServer', 4), ('音频编解码', 4), ('数据开发', 4), ('产品', 4), ('数据分析', 4), ('推荐系统', 4), ('kafka', 4), ('linux', 4), ('python', 4), ('python爬虫', 4), ('后端开发', 4), ('医疗健康', 4), ('spring cloud', 4), ('springboot', 4), ('全栈', 4), ('人脸识别', 4), ('.NET', 4), ('springcloud', 4), ('Lua', 4), ('NLP', 4), ('OpenGL', 4), ('DB2', 4), ('MongoDB', 3), ('Angularjs', 3), ('mysql', 3), ('算法工程师', 3), ('Storm', 3), ('语音合成', 3), ('图片识别', 3), ('内核开发', 3), ('语音识别', 3), ('CSS3', 3), ('hbase', 3), ('软件测试', 3), ('IM通讯', 3), ('视频编解码', 3), ('搜索算法', 3), ('Socket技术', 3), ('Hybrid', 3), ('Scala', 3), ('测试', 3), ('模式识别', 3), ('Linux/Unix', 3), ('数据库管理员', 3), ('GO', 3), ('Redis', 3), ('JS', 3), ('自动化', 2), ('DevOps/AIOps', 2), ('dubbo', 2), ('OSG', 2), ('自动化测试', 2), ('clickhouse', 2), ('go', 2), ('数据服务', 2), ('jQuery', 2), ('系统运维', 2), ('自动驾驶', 2), ('数据架构', 2), ('K8s/Docker', 2), ('JVM', 2), ('BIM', 2), ('slam', 2), ('社交媒体', 2), ('数据采集', 2), ('redis', 2), ('系统架构', 2), ('计算机视觉', 2), ('英伟达', 2), ('TensoFlow', 2), ('Spring boot', 2), ('SpringBoot', 2), ('SpringCloud', 2), ('海思', 2), ('ES6', 2), ('J2EE', 2), ('安全专家', 2), ('数据', 2), ('大数据运维', 2), ('机顶盒', 2), ('广告算法', 2), ('MFC', 2), ('大数据', 2), ('Dubbo', 2), ('推荐', 2), ('vue', 2), ('高并发', 2), ('高通', 2), ('企业软件', 2), ('CV', 1), ('服务端', 1), ('智能网联研发方', 1), ('时间序列', 1), ('服务机器人', 1), ('服务器开发', 1), ('智能硬件', 1), ('RTMP/HLS', 1), ('文本生成', 1), ('文本分类', 1), ('技术管理', 1), ('Flask', 1), ('Flash', 1), ('插件化', 1), ('FFmpeg', 1), ('政府关系', 1), ('ElasticSearch', 1), ('ERP', 1), ('ECLIPSE', 1), ('数据仓库模型', 1), ('数据埋点', 1), ('数据安全', 1), ('Docker/Kuberne', 1), ('Docker', 1), ('Django', 1), ('数据抓取', 1), ('Db2', 1), ('数据治理', 1), ('数据结构', 1), ('架构', 1), ('瑞芯微', 1), ('CTR', 1), ('运维管理', 1), ('视频/监控分析', 1), ('视频流转码', 1), ('视频识别', 1), ('语音处理', 1), ('课程研发', 1), ('路径规划', 1), ('Android测试', 1), ('软件集成', 1), ('运筹', 1), ('运筹优化', 1), ('AR', 1), ('运维开发', 1), ('运营', 1), ('架构开发', 1), ('API', 1), ('避障', 1), ('ACM', 1), ('音视频处理', 1), ('3d 游戏', 1), ('音频算法', 1), ('3D（游戏）', 1), ('风控', 1), ('风险分析', 1), ('风险管理', 1), ('驱动开发', 1), ('高性能计算', 1), ('视觉算法', 1), ('规则开发', 1), ('英语', 1), ('Android系统', 1), ('桌面软件产品', 1), ('流媒体转封装', 1), ('测试开发', 1), ('消息队列', 1), ('COCOS2D-X', 1), ('清洗', 1), ('CI/CD', 1), ('CDN', 1), ('CAD', 1), ('爬虫工程师', 1), ('物流', 1), ('GB28181', 1), ('电商营销平台', 1), ('白盒测试', 1), ('监控', 1), ('BigQuery', 1), ('神经网络', 1), ('Apache火花', 1), ('算法优化', 1), ('算法加速', 1), ('网络TCP/UDP', 1), ('网络加速', 1), ('网络协议', 1), ('网络架构', 1), ('网络调度', 1), ('手游', 1), ('存储', 1), ('性能测试', 1), ('saas', 1), ('OpenCV', 1), ('Object-C开发语', 1), ('XCode', 1), ('XHTML/XML', 1), ('Xcode', 1), ('Zabbix/Prometheus', 1), ('app测试', 1), ('c++', 1), ('canvas、svg', 1), ('cloud', 1), ('elasticsearch', 1), ('electron', 1), ('framework', 1), ('gradle', 1), ('hadoop', 1), ('ORACLE', 1), ('NodeJS', 1), ('k8s', 1), ('Nginx', 1), ('leveldb', 1), ('native', 1), ('object-c', 1), ('openGL ES', 1), ('oracle', 1), ('Mysql', 1), ('MySql', 1), ('qt', 1), ('Weblogic', 1), ('Vue、react', 1), ('OpenCascade', 1), ('PCR框架', 1), ('ROS', 1), ('Ruby', 1), ('Qt', 1), ('SLAM', 1), ('SPRING', 1), ('QUIC', 1), ('Python讲师', 1), ('SQLite', 1), ('SRT', 1), ('SSH', 1), ('Perl', 1), ('Paas', 1), ('Spring Cloud', 1), ('VR', 1), ('Springboot', 1), ('Openresty', 1), ('TCP', 1), ('TCP/IP', 1), ('TV端', 1), ('Tornado', 1), ('Typescript', 1), ('UDP', 1), ('Unity 3D', 1), ('Unity3d', 1), ('Unix', 1), ('VB', 1), ('rust', 1), ('sdk', 1), ('微架构', 1), ('shader', 1), ('医学影像', 1), ('医疗器械', 1), ('医疗影像诊断', 1), ('可视化', 1), ('JNI', 1), ('周末双休', 1), ('售前', 1), ('图像分割', 1), ('JMV', 1), ('图像识别', 1), ('图形', 1), ('图形图像处理', 1), ('图形引擎', 1), ('IT支持', 1), ('大数据开发', 1), ('RPA', 1), ('ISO27001', 1), ('安卓', 1), ('安卓应用', 1), ('安卓研发', 1), ('容器化', 1), ('少儿编程', 1), ('工业互联网', 1), ('工业视觉检测', 1), ('平台', 1), ('底层', 1), ('Hive SQL', 1), ('功能测试', 1), ('Jenkins', 1), ('前端', 1), ('业务运维', 1), ('spark', 1), ('MySQl', 1), ('spring boot', 1), ('MR', 1), ('15薪', 1), ('ssm', 1), ('web', 1), ('webgl', 1), ('webpack', 1), ('MQ', 1), ('windows', 1), ('三维图像视觉', 1), ('中文分词', 1), ('创客导师', 1), ('中间件运维', 1), ('MES', 1), ('互联网', 1), ('产品经理', 1), ('LINUX', 1), ('传感器', 1), ('信息检索', 1), ('Kafka', 1), ('其他', 1), ('内存管理编程', 1), ('几何算法', 1), ('分布式技术', 1), ('黑盒测试', 1)]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\Lickry\\\\Desktop\\\\交互数据\\\\final project\\\\wc.html'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_excel('jobs.xlsx')\n",
    "data['skillLables'] = data['skillLables'].apply(lambda x: x.split(',') if not pd.isna(x) else x)\n",
    "data = data.explode('skillLables')\n",
    "labels = data.pivot_table(index='skillLables', values='positionId', aggfunc='count')\n",
    "labels = labels.sort_values('positionId', ascending=False)\n",
    "labels_chart2 = charts.WordCloud()\n",
    "labels_chart2.add('', [(x[0], int(x[1]['positionId'])) for x in labels.iterrows()])\n",
    "labels_chart2.set_global_opts(title_opts=options.TitleOpts('招聘信息技能要求词云图',\n",
    "                                                           subtitle='java、android、C++依然是最热门的技能标签，python、数仓也有很高热度'))\n",
    "print([(x[0], x[1]['positionId']) for x in labels.iterrows()])\n",
    "labels_chart2.render('wc.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'C:\\\\Users\\\\Lickry\\\\Desktop\\\\交互数据\\\\final project\\\\skill.html'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_excel('jobs.xlsx')\n",
    "data['skillLables'] = data['skillLables'].apply(lambda x: x.split(',') if not pd.isna(x) else x)\n",
    "data = data.explode('skillLables')\n",
    "labels = data.pivot_table(index='skillLables', values='positionId', aggfunc='count')\n",
    "labels = labels.sort_values('positionId', ascending=False).head(20)\n",
    "labels_chart1 = charts.Bar()\n",
    "labels_chart1.add_xaxis(list(labels.index.values))\n",
    "labels_chart1.add_yaxis('', list(labels['positionId']), color='#87CEEB').set_global_opts(\n",
    "    datazoom_opts=options.DataZoomOpts(range_start=0, range_end=30))\n",
    "labels_chart1.set_global_opts(title_opts=options.TitleOpts('各技能标签招聘职位数量',\n",
    "                                                           subtitle='在所有技能标签中，java高居榜首，其次是C++，ios和安卓紧随其后'))\n",
    "labels_chart1.render('skill.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
